LinkedIn Web Scraping with Bright Data MCP Server & Google Gemini
工作流概述
这是一个包含20个节点的复杂工作流,主要用于自动化处理各种任务。
工作流源代码
{
"id": "D2RkoPZlkKFRUrNu",
"meta": {
"instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
"templateCredsSetupCompleted": true
},
"name": "LinkedIn Web Scraping with Bright Data MCP Server & Google Gemini",
"tags": [
{
"id": "ZOwtAMLepQaGW76t",
"name": "Building Blocks",
"createdAt": "2025-04-13T15:23:40.462Z",
"updatedAt": "2025-04-13T15:23:40.462Z"
},
{
"id": "ddPkw7Hg5dZhQu2w",
"name": "AI",
"createdAt": "2025-04-13T05:38:08.053Z",
"updatedAt": "2025-04-13T05:38:08.053Z"
}
],
"nodes": [
{
"id": "68715d64-ce99-4e23-81ed-fe8f7d08ebd7",
"name": "When clicking ‘Test workflow’",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-640,
-50
],
"parameters": {},
"typeVersion": 1
},
{
"id": "e0295397-2926-4964-8be5-c0341de29a02",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-260,
-420
],
"parameters": {
"color": 3,
"width": 440,
"height": 320,
"content": "## Bright Data LinkedIn Person Scraper"
},
"typeVersion": 1
},
{
"id": "cdf42164-569e-4140-9847-4751d69c6b7b",
"name": "Set the URLs",
"type": "n8n-nodes-base.set",
"position": [
-200,
-300
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "214e61a0-3587-453f-baf5-eac013990857",
"name": "url",
"type": "string",
"value": "https://www.linkedin.com/in/ranjan-dailata/"
},
{
"id": "45014942-0a2e-4f46-b395-f82f97bfa93e",
"name": "webhook_url",
"type": "string",
"value": "https://webhook.site/ce41e056-c097-48c8-a096-9b876d3abbf7"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "5769fce6-bcd7-4a13-b992-cd6d955a2cf1",
"name": "Bright Data MCP Client For LinkedIn Person",
"type": "n8n-nodes-mcp.mcpClient",
"notes": "Scrape a single webpage URL with advanced options for content extraction and get back the results in MarkDown language.",
"position": [
20,
-300
],
"parameters": {
"toolName": "web_data_linkedin_person_profile",
"operation": "executeTool",
"toolParameters": "={
\"url\": \"{{ $json.url }}\"
} "
},
"credentials": {
"mcpClientApi": {
"id": "JtatFSfA2kkwctYa",
"name": "MCP Client (STDIO) account"
}
},
"notesInFlow": true,
"typeVersion": 1
},
{
"id": "56e37aa6-9719-4879-80af-a10c091377fb",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
-260,
-60
],
"parameters": {
"color": 4,
"width": 440,
"height": 320,
"content": "## Bright Data LinkedIn Company Scraper"
},
"typeVersion": 1
},
{
"id": "69afab25-32c6-4849-b2f9-4a2b25657c37",
"name": "List all tools for Bright Data",
"type": "n8n-nodes-mcp.mcpClient",
"position": [
-420,
50
],
"parameters": {},
"credentials": {
"mcpClientApi": {
"id": "JtatFSfA2kkwctYa",
"name": "MCP Client (STDIO) account"
}
},
"typeVersion": 1
},
{
"id": "feb16a2b-fdf7-49d4-bcd5-848ccaf66639",
"name": "Bright Data MCP Client For LinkedIn Company",
"type": "n8n-nodes-mcp.mcpClient",
"notes": "Scrape a single webpage URL with advanced options for content extraction and get back the results in MarkDown language.",
"position": [
20,
50
],
"parameters": {
"toolName": "web_data_linkedin_company_profile",
"operation": "executeTool",
"toolParameters": "={
\"url\": \"{{ $json.url }}\"
} "
},
"credentials": {
"mcpClientApi": {
"id": "JtatFSfA2kkwctYa",
"name": "MCP Client (STDIO) account"
}
},
"notesInFlow": true,
"typeVersion": 1
},
{
"id": "e5117eb1-a757-4c28-965e-87ea03213ed1",
"name": "Set the LinkedIn Company URL",
"type": "n8n-nodes-base.set",
"position": [
-200,
50
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "214e61a0-3587-453f-baf5-eac013990857",
"name": "url",
"type": "string",
"value": "https://www.linkedin.com/company/bright-data/"
},
{
"id": "45014942-0a2e-4f46-b395-f82f97bfa93e",
"name": "webhook_url",
"type": "string",
"value": "https://webhook.site/ce41e056-c097-48c8-a096-9b876d3abbf7"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "99f45d7f-ad79-4ffc-8299-c71bd870f8fb",
"name": "Webhook for LinkedIn Company Web Scraper",
"type": "n8n-nodes-base.httpRequest",
"position": [
1060,
40
],
"parameters": {
"url": "={{ $('Set the LinkedIn Company URL').item.json.webhook_url }}",
"options": {},
"jsonBody": "={
\"about\": {{ JSON.stringify($json.about[0]) }},
\"story\": {{ JSON.stringify($json.company_story[0]) }}
}",
"sendBody": true,
"specifyBody": "json"
},
"typeVersion": 4.2
},
{
"id": "5dfd2630-17d9-4a13-8cd6-57a564ef4a26",
"name": "LinkedIn Data Extractor",
"type": "@n8n/n8n-nodes-langchain.informationExtractor",
"position": [
240,
200
],
"parameters": {
"text": "=Write a complete story of the provided company information in JSON. Use the following Company info to produce a story or a blog post. Make sure to incorporate all the provided company context.
Here's the Company Info in JSON - {{ $json.input }}",
"options": {
"systemPromptTemplate": "You are an expert data formatter"
},
"attributes": {
"attributes": [
{
"name": "company_story",
"required": true,
"description": "Detailed Company Info"
}
]
}
},
"typeVersion": 1
},
{
"id": "d1927c08-5ded-4b0b-b60b-bed126040d38",
"name": "Google Gemini Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
328,
420
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "0de1d200-c35a-41df-b512-8b97b92f14db",
"name": "List all available tools for Bright Data",
"type": "n8n-nodes-mcp.mcpClient",
"position": [
-420,
-300
],
"parameters": {},
"credentials": {
"mcpClientApi": {
"id": "JtatFSfA2kkwctYa",
"name": "MCP Client (STDIO) account"
}
},
"typeVersion": 1
},
{
"id": "3f884694-b8f3-478a-b1a3-f46326a0c96f",
"name": "Code",
"type": "n8n-nodes-base.code",
"position": [
318,
-100
],
"parameters": {
"jsCode": "jsonContent = JSON.parse($input.first().json.result.content[0].text)
return jsonContent
"
},
"typeVersion": 2
},
{
"id": "67036198-4d7d-42d9-93cf-ffc65649bae0",
"name": "Merge",
"type": "n8n-nodes-base.merge",
"position": [
616,
50
],
"parameters": {},
"typeVersion": 3.1
},
{
"id": "77423290-bd08-4dc8-9f37-cf8fec9f6a63",
"name": "Aggregate",
"type": "n8n-nodes-base.aggregate",
"position": [
836,
50
],
"parameters": {
"options": {},
"fieldsToAggregate": {
"fieldToAggregate": [
{
"fieldToAggregate": "about"
},
{
"fieldToAggregate": "output.company_story"
}
]
}
},
"typeVersion": 1
},
{
"id": "91d25405-afb3-4ed6-b8fa-52ab64a654e2",
"name": "Create a binary data for LinkedIn person info extract",
"type": "n8n-nodes-base.function",
"position": [
320,
-500
],
"parameters": {
"functionCode": "items[0].binary = {
data: {
data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')
}
};
return items;"
},
"typeVersion": 1
},
{
"id": "3e74c49e-eb31-43b1-b8e1-ed960bd83ca1",
"name": "Write the LinkedIn person info to disk",
"type": "n8n-nodes-base.readWriteFile",
"position": [
520,
-500
],
"parameters": {
"options": {},
"fileName": "d:\LinkedIn-Person.json",
"operation": "write"
},
"typeVersion": 1
},
{
"id": "f92b3505-2af6-42aa-bf4b-8b7b6cb97364",
"name": "Create a binary data for LinkedIn company info extract",
"type": "n8n-nodes-base.function",
"position": [
1000,
-180
],
"parameters": {
"functionCode": "items[0].binary = {
data: {
data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')
}
};
return items;"
},
"typeVersion": 1
},
{
"id": "6ed1402b-4858-4311-bede-f0b8f28acb9f",
"name": "Write the LinkedIn company info to disk",
"type": "n8n-nodes-base.readWriteFile",
"position": [
1220,
-180
],
"parameters": {
"options": {},
"fileName": "d:\LinkedIn-Company.json",
"operation": "write"
},
"typeVersion": 1
},
{
"id": "335efc2b-80e3-4fac-b31f-82fff4ac4e65",
"name": "Webhook for LinkedIn Person Web Scraper",
"type": "n8n-nodes-base.httpRequest",
"position": [
318,
-300
],
"parameters": {
"url": "={{ $('Set the URLs').item.json.webhook_url }}",
"options": {},
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "response",
"value": "={{ $json.result.content[0].text }}"
}
]
}
},
"typeVersion": 4.2
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "35815900-1729-40c7-b128-778eabb62ec1",
"connections": {
"Code": {
"main": [
[
{
"node": "Merge",
"type": "main",
"index": 0
}
]
]
},
"Merge": {
"main": [
[
{
"node": "Aggregate",
"type": "main",
"index": 0
}
]
]
},
"Aggregate": {
"main": [
[
{
"node": "Webhook for LinkedIn Company Web Scraper",
"type": "main",
"index": 0
},
{
"node": "Create a binary data for LinkedIn company info extract",
"type": "main",
"index": 0
}
]
]
},
"Set the URLs": {
"main": [
[
{
"node": "Bright Data MCP Client For LinkedIn Person",
"type": "main",
"index": 0
}
]
]
},
"LinkedIn Data Extractor": {
"main": [
[
{
"node": "Merge",
"type": "main",
"index": 1
}
]
]
},
"Google Gemini Chat Model": {
"ai_languageModel": [
[
{
"node": "LinkedIn Data Extractor",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Set the LinkedIn Company URL": {
"main": [
[
{
"node": "Bright Data MCP Client For LinkedIn Company",
"type": "main",
"index": 0
}
]
]
},
"List all tools for Bright Data": {
"main": [
[
{
"node": "Set the LinkedIn Company URL",
"type": "main",
"index": 0
}
]
]
},
"When clicking ‘Test workflow’": {
"main": [
[
{
"node": "List all available tools for Bright Data",
"type": "main",
"index": 0
},
{
"node": "List all tools for Bright Data",
"type": "main",
"index": 0
}
]
]
},
"Webhook for LinkedIn Person Web Scraper": {
"main": [
[]
]
},
"List all available tools for Bright Data": {
"main": [
[
{
"node": "Set the URLs",
"type": "main",
"index": 0
}
]
]
},
"Bright Data MCP Client For LinkedIn Person": {
"main": [
[
{
"node": "Webhook for LinkedIn Person Web Scraper",
"type": "main",
"index": 0
},
{
"node": "Create a binary data for LinkedIn person info extract",
"type": "main",
"index": 0
}
]
]
},
"Bright Data MCP Client For LinkedIn Company": {
"main": [
[
{
"node": "Code",
"type": "main",
"index": 0
},
{
"node": "LinkedIn Data Extractor",
"type": "main",
"index": 0
}
]
]
},
"Create a binary data for LinkedIn person info extract": {
"main": [
[
{
"node": "Write the LinkedIn person info to disk",
"type": "main",
"index": 0
}
]
]
},
"Create a binary data for LinkedIn company info extract": {
"main": [
[
{
"node": "Write the LinkedIn company info to disk",
"type": "main",
"index": 0
}
]
]
}
}
}
功能特点
- 自动检测新邮件
- AI智能内容分析
- 自定义分类规则
- 批量处理能力
- 详细的处理日志
技术分析
节点类型及作用
- Manualtrigger
- Stickynote
- Set
- N8N Nodes Mcp.Mcpclient
- Httprequest
复杂度评估
配置难度:
维护难度:
扩展性:
实施指南
前置条件
- 有效的Gmail账户
- n8n平台访问权限
- Google API凭证
- AI分类服务订阅
配置步骤
- 在n8n中导入工作流JSON文件
- 配置Gmail节点的认证信息
- 设置AI分类器的API密钥
- 自定义分类规则和标签映射
- 测试工作流执行
- 配置定时触发器(可选)
关键参数
| 参数名称 | 默认值 | 说明 |
|---|---|---|
| maxEmails | 50 | 单次处理的最大邮件数量 |
| confidenceThreshold | 0.8 | 分类置信度阈值 |
| autoLabel | true | 是否自动添加标签 |
最佳实践
优化建议
- 定期更新AI分类模型以提高准确性
- 根据邮件量调整处理批次大小
- 设置合理的分类置信度阈值
- 定期清理过期的分类规则
安全注意事项
- 妥善保管API密钥和认证信息
- 限制工作流的访问权限
- 定期审查处理日志
- 启用双因素认证保护Gmail账户
性能优化
- 使用增量处理减少重复工作
- 缓存频繁访问的数据
- 并行处理多个邮件分类任务
- 监控系统资源使用情况
故障排除
常见问题
邮件未被正确分类
检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。
Gmail认证失败
确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。
调试技巧
- 启用详细日志记录查看每个步骤的执行情况
- 使用测试邮件验证分类逻辑
- 检查网络连接和API服务状态
- 逐步执行工作流定位问题节点
错误处理
工作流包含以下错误处理机制:
- 网络超时自动重试(最多3次)
- API错误记录和告警
- 处理失败邮件的隔离机制
- 异常情况下的回滚操作